{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "f1029e76-7e61-4296-801d-3a49637b5972",
   "metadata": {},
   "source": [
    "# Assertion-Based Evaluation\n",
    "\n",
    "Evaluating model output with assertions.\n",
    "\n",
    "Add `prettytable` to make a nice looking output."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "64f96a0a-bb6e-4dbf-a37a-1f602429fb47",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip3 install prettytable==3.10.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1b70c49c-2d18-4c16-a58e-5ab644858c9f",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip3 show prettytable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3650ce4e-c63a-4f96-968f-bffbfd030d3b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from prettytable import PrettyTable\n",
    "\n",
    "input_relevance_guardrail_data = [\n",
    "    {\n",
    "        \"input\": \"What should I do in New York City in July?\",\n",
    "        \"output\": True,\n",
    "        \"expected\": True\n",
    "    },\n",
    "    {\n",
    "        \"input\": \"Can you help me with my math homework?\",\n",
    "        \"output\": False,\n",
    "        \"expected\": False\n",
    "    },\n",
    "    {\n",
    "        \"input\": \"What’s the capital of France?\",\n",
    "        \"output\": False,\n",
    "        \"expected\": True\n",
    "    },   \n",
    "]\n",
    "\n",
    "# assertion-based evaluation\n",
    "def evaluate_correctness(output, expected):\n",
    "    return 1 if output == expected else 0\n",
    "\n",
    "def calculate_average(scores):\n",
    "    return sum(scores) / len(scores)\n",
    "\n",
    "def create_table(data):\n",
    "    table = PrettyTable()\n",
    "    table.field_names = [\"Input\", \"Output\", \"Expected\", \"Score\"]\n",
    "    \n",
    "    scores = [evaluate_correctness(case[\"output\"], case[\"expected\"]) for case in data]\n",
    "    \n",
    "    for case, score in zip(data, scores):\n",
    "        table.add_row([case[\"input\"], case[\"output\"], case[\"expected\"], score])\n",
    "    \n",
    "    # Add a blank row for visual separation\n",
    "    table.add_row([\"\", \"\", \"\", \"\"])\n",
    "    \n",
    "    # Add average score to bottom of the table\n",
    "    average_score = calculate_average(scores)\n",
    "    table.add_row([\"Average\", \"\", \"\", f\"{average_score:.4f}\"])\n",
    "    \n",
    "    return table\n",
    "\n",
    "# Create and print the table\n",
    "result_table = create_table(input_relevance_guardrail_data)\n",
    "print(result_table)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a413560-142c-475a-bbb6-b5ff13ec3e37",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
